Here, Bayesian regression is used to predict the void volume, interval and urine production rate in the third sample from the 76 men in the NHANES sample from 2009-18 who provided three voids.
Also considered were the concentrations of sodium, potassium, chloride and bicarbonate in serum, anion gap, serum creatinine, and urinary creatinine. The use of these covariates,however, reduced the number of samples from 76 to 45 and for this reason, these were not used as covariates The models used are shown below.
First, though, some data wrangling is necessary.
men_all3 <- read_csv("men_all3_ions.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
dplyr::mutate(men_all3, grp = rep(0,76))
## # A tibble: 76 x 22
## age wt ht bmi intrvl1 intrvl2 intrvl3 flow1 flow2 flow3 vol1 vol2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 17 78.1 179. 24.3 150 75 56 0.007 0.4 3.36 1 30
## 2 11 28.3 142 14.0 160 37 23 0.138 0.243 0.739 22 9
## 3 6 22.2 120. 15.3 83 136 59 0.145 0.147 1.15 12 20
## 4 27 79.7 180. 24.6 119 40 60 0.319 0.15 0.283 38 6
## 5 7 34.7 127. 21.4 763 49 20 0.003 0.041 0.7 2 2
## 6 7 20.5 124. 13.4 51 78 36 0.49 0.09 0.639 25 7
## 7 8 48 139. 25.0 109 15 111 0.339 0.133 2.65 37 2
## 8 68 129. 182. 39.3 41 113 44 0.683 0.009 2.84 28 1
## 9 12 28 136. 15.2 41 19 59 0.463 0.579 2.37 19 11
## 10 10 38.6 135. 21.3 16 78 8 0.563 0.423 2.75 9 33
## # … with 66 more rows, and 10 more variables: vol3 <dbl>, S_osmol <dbl>,
## # SCr <dbl>, bicarb <dbl>, Cl <dbl>, Na <dbl>, Potass <dbl>, UCr <dbl>,
## # AUCr_pred <dbl>, grp <dbl>
men_all3$grp <- rep(0,76)
for (i in 1:nrow(men_all3)) {
if (men_all3$intrvl1[i] <=120) { men_all3$grp[i] <- 1}
if (men_all3$intrvl1[i] >120 && men_all3$intrvl1[i] <=240) { men_all3$grp[i] <- 2 }
if (men_all3$intrvl1[i] >240 && men_all3$intrvl1[i] <=360) { men_all3$grp[i] <- 3 }
if (men_all3$intrvl1[i] >360 && men_all3$intrvl1[i] <=480) { men_all3$grp[i] <- 4 }
if (men_all3$intrvl1[i] >480) { men_all3$grp[i] <- 5 }
}
lmen3 <- data.frame(
log_age = log(men_all3$age),
log_wt = log(men_all3$wt),
log_ht = log(men_all3$ht),
log_bmi = log(men_all3$bmi),
log_int1 = log(men_all3$intrvl1),
log_int2 = log(men_all3$intrvl2),
log_int3 = log(men_all3$intrvl3),
log_vol1 = log(men_all3$vol1),
log_vol2 = log(men_all3$vol2),
log_vol3 = log(men_all3$vol3),
log_flow1 = log(men_all3$flow1),
log_flow2 = log(men_all3$flow2),
log_flow3 = log(men_all3$flow3),
grp = men_all3$grp )
run models
if (file.exists("men_vol_mod_020321.Rds")) {
men_vol_model <- readRDS(file="men_vol_mod_020321.Rds")
} else {
men_vol_model <-
brm(data = lmen3, family = gaussian,
log_vol3 ~ (1 | grp) + log_age + log_wt + log_int1 + log_int2 + log_vol1 + log_vol2 + log_flow1 + log_flow2,
prior = c(set_prior("normal(0,1000)", class = "Intercept"),
set_prior("normal(0,100)", class = "b"),
set_prior("cauchy(0,2)", class = "sigma")),
warmup = 2000, iter = 5000, chains = 4,
control = list(adapt_delta = 0.99, max_treedepth=200))
saveRDS(men_vol_model, file="men_vol_mod_020321.Rds")
}
summary(men_vol_model)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: log_vol3 ~ (1 | grp) + log_age + log_wt + log_int1 + log_int2 + log_vol1 + log_vol2 + log_flow1 + log_flow2
## Data: lmen3 (Number of observations: 76)
## Samples: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup samples = 12000
##
## Group-Level Effects:
## ~grp (Number of levels: 4)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.48 0.52 0.01 1.83 1.00 3708 5369
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 5.92 1.70 2.42 9.18 1.00 9579 8662
## log_age -0.46 0.27 -1.00 0.07 1.00 7912 7955
## log_wt 0.16 0.39 -0.60 0.92 1.00 8266 8210
## log_int1 -9.09 7.93 -24.62 6.51 1.00 4949 6705
## log_int2 45.12 19.33 6.24 82.26 1.00 5192 6886
## log_vol1 9.05 7.91 -6.46 24.48 1.00 4949 6704
## log_vol2 -45.42 19.30 -82.55 -6.61 1.00 5189 6814
## log_flow1 -9.14 7.99 -24.76 6.52 1.00 4946 6718
## log_flow2 45.56 19.36 6.57 82.77 1.00 5192 6776
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.97 0.09 0.82 1.15 1.00 10958 7967
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
if (file.exists("men_intrvl_mod_020321.Rds")) {
men_intrvl_model <- readRDS(file="men_intrvl_mod_020321.Rds")
} else {
men_intrvl_model <-
brm(data = lmen3, family = gaussian,
log_int3 ~ (1 | grp) + log_age + log_wt + log_ht + log_bmi + log_int1 + log_int2 + log_vol1 + log_vol2 + log_flow1 + log_flow2,
prior = c(set_prior("normal(0,1000)", class = "Intercept"),
set_prior("normal(0,100)", class = "b"),
set_prior("cauchy(0,2)", class = "sigma")),
warmup = 2000, iter = 5000, chains = 4,
control = list(adapt_delta = 0.99, max_treedepth=200))
saveRDS(men_intrvl_model, file="men_intrvl_mod_020321.Rds")
}
summary(men_intrvl_model)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: log_int3 ~ (1 | grp) + log_age + log_wt + log_ht + log_bmi + log_int1 + log_int2 + log_vol1 + log_vol2 + log_flow1 + log_flow2
## Data: lmen3 (Number of observations: 76)
## Samples: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup samples = 12000
##
## Group-Level Effects:
## ~grp (Number of levels: 4)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.46 0.48 0.01 1.71 1.00 2439 4498
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 239.87 291.91 -335.45 810.02 1.00 4955 6338
## log_age 0.06 0.15 -0.23 0.36 1.00 12025 8305
## log_wt 25.95 31.73 -36.60 87.80 1.00 4959 6385
## log_ht -51.11 63.42 -174.92 73.83 1.00 4953 6362
## log_bmi -26.34 31.74 -88.35 36.33 1.00 4954 6411
## log_int1 -7.45 4.29 -15.75 1.15 1.00 5695 6472
## log_int2 5.42 10.70 -16.13 26.29 1.00 6313 6683
## log_vol1 7.58 4.28 -0.96 15.81 1.00 5680 6435
## log_vol2 -5.63 10.68 -26.48 15.94 1.00 6312 6590
## log_flow1 -7.67 4.32 -15.97 0.96 1.00 5681 6484
## log_flow2 5.59 10.72 -16.01 26.50 1.00 6314 6622
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.52 0.05 0.43 0.62 1.00 9371 8105
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
if (file.exists("men_flow-mod_020321.Rds")) {
men_flow_model <- readRDS(file="men_flow-mod_020321.Rds")
} else {
men_flow_model <-
brm(data = lmen3, family = gaussian,
log_flow3 ~ (1 | grp) + log_age + log_wt + log_ht + log_bmi + log_int1 + log_int2 + log_vol1 + log_vol2 + log_flow1 + log_flow2,
prior = c(set_prior("normal(0,1000)", class = "Intercept"),
set_prior("normal(0,100)", class = "b"),
set_prior("cauchy(0,2)", class = "sigma")),
warmup = 2000, iter = 5000, chains = 4,
control = list(adapt_delta = 0.99, max_treedepth=200))
saveRDS(men_flow_model, file="men_flow-mod_020321.Rds")
}
summary(men_flow_model)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: log_flow3 ~ (1 | grp) + log_age + log_wt + log_ht + log_bmi + log_int1 + log_int2 + log_vol1 + log_vol2 + log_flow1 + log_flow2
## Data: lmen3 (Number of observations: 76)
## Samples: 4 chains, each with iter = 5000; warmup = 2000; thin = 1;
## total post-warmup samples = 12000
##
## Group-Level Effects:
## ~grp (Number of levels: 4)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.44 0.48 0.01 1.73 1.00 3727 5742
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -101.68 341.17 -766.70 575.87 1.00 5979 6971
## log_age -0.51 0.28 -1.07 0.04 1.00 12271 7932
## log_wt -11.57 37.08 -83.84 61.97 1.00 5974 6886
## log_ht 22.22 74.11 -124.56 166.59 1.00 5979 6977
## log_bmi 12.33 37.09 -61.06 84.43 1.00 5974 6876
## log_int1 -1.92 8.18 -17.61 14.44 1.00 5479 6331
## log_int2 39.09 19.57 1.04 78.18 1.00 5237 6657
## log_vol1 1.78 8.16 -14.57 17.54 1.00 5480 6393
## log_vol2 -39.17 19.53 -78.17 -1.30 1.00 5239 6670
## log_flow1 -1.80 8.25 -17.63 14.67 1.00 5477 6290
## log_flow2 39.35 19.59 1.31 78.45 1.00 5240 6657
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.99 0.09 0.83 1.18 1.00 11777 8388
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
The desired result of these three models is the daytime within-person distributional parameters forf void volume, void interval and urine production rate. Each of these was modeled as a lognormal distribution with parameters mu and sigma, the log mean and log standard deviation respectively. For each individual, these two parameter are calculated from three measurements. The next chunk provides the calculations for these, both from the data and for the model predictions. Note that only the natural logarithms of the third void volume, void interval and urine production rate are the direct model outputs.
mutate(lmen3, mu_vol=rep(0,76), sig_vol=rep(0,76), mu_intrvl=rep(0,76), sig_intrvl=rep(0,76))
## log_age log_wt log_ht log_bmi log_int1 log_int2 log_int3 log_vol1
## 1 2.833213 4.357990 5.189060 3.190065 5.010635 4.317488 4.025352 0.0000000
## 2 2.397895 3.342862 4.955827 2.641198 5.075174 3.610918 3.135494 3.0910425
## 3 1.791759 3.100092 4.790820 2.728506 4.418841 4.912655 4.077537 2.4849066
## 4 3.295837 4.378270 5.193512 3.201526 4.779123 3.688879 4.094345 3.6375862
## 5 1.945910 3.546740 4.845761 3.065725 6.637258 3.891820 2.995732 0.6931472
## 6 1.945910 3.020425 4.819475 2.591516 3.931826 4.356709 3.583519 3.2188758
## 7 2.079442 3.871201 4.932313 3.216874 4.691348 2.708050 4.709530 3.6109179
## 8 4.219508 4.862908 5.201256 3.670715 3.713572 4.727388 3.784190 3.3322045
## 9 2.484907 3.332205 4.910447 2.721953 3.713572 2.944439 4.077537 2.9444390
## 10 2.302585 3.653252 4.903050 3.057298 2.772589 4.356709 2.079442 2.1972246
## 11 3.496508 4.117410 5.178971 2.969902 5.111988 4.077537 4.691348 2.9957323
## 12 2.833213 4.117410 5.150397 3.026746 6.492240 3.555348 4.442651 2.7725887
## 13 3.496508 4.617099 5.234845 3.357594 4.158883 5.111988 3.135494 3.5263605
## 14 2.833213 4.339902 5.180097 3.190065 4.624973 3.737670 4.248495 3.5263605
## 15 2.397895 3.468856 4.971894 2.735665 5.062595 2.772589 4.219508 3.0910425
## 16 4.382027 4.453184 5.156178 3.349904 4.955827 4.718499 4.127134 2.7725887
## 17 1.945910 3.269569 4.810557 2.856470 3.583519 3.091042 4.465908 2.5649494
## 18 2.079442 3.169686 4.829912 2.721295 4.127134 4.382027 4.219508 2.4849066
## 19 4.025352 4.576771 5.170484 3.446808 4.905275 4.762174 4.356709 3.0910425
## 20 3.637586 4.363099 5.182345 3.206803 4.394449 3.713572 4.615121 0.0000000
## 21 2.197225 3.538057 4.936630 2.873565 6.278521 4.770685 3.713572 3.0445224
## 22 2.772589 4.099332 5.115596 3.077312 5.141664 3.871201 4.584967 3.7135721
## 23 4.343805 4.442651 5.182345 3.288402 4.700480 4.442651 3.850148 2.8903718
## 24 3.610918 4.290459 5.135798 3.230804 4.682131 4.204693 4.477337 3.0910425
## 25 2.944439 4.203199 5.227358 2.960105 3.713572 4.709530 3.850148 2.3025851
## 26 2.890372 4.007333 5.175019 2.867899 4.672829 3.850148 4.406719 3.3672958
## 27 4.382027 3.843744 5.148657 2.753661 4.158883 4.962845 3.526361 2.6390573
## 28 2.708050 3.749504 5.057519 2.844909 4.605170 4.859812 4.465908 2.7725887
## 29 1.945910 3.186353 4.799914 2.797281 4.262680 3.401197 3.688879 2.7080502
## 30 3.258097 4.269697 5.132853 3.214868 4.477337 4.955827 3.637586 3.7376696
## 31 3.637586 4.694096 5.246498 3.411148 3.610918 4.465908 3.891820 2.4849066
## 32 1.945910 3.238678 4.847332 2.753661 4.905275 3.555348 3.555348 3.2188758
## 33 4.025352 4.371976 5.139322 3.303217 5.236442 3.258097 4.356709 3.7612001
## 34 2.197225 3.198673 4.821893 2.766319 5.068904 3.610918 4.043051 3.4657359
## 35 2.079442 3.613617 4.906015 3.010621 3.931826 4.488636 3.871201 2.7080502
## 36 3.637586 4.278054 5.218191 3.054001 4.330733 4.477337 4.343805 2.8903718
## 37 3.931826 4.080922 5.153869 2.985682 3.401197 4.927254 3.737670 2.4849066
## 38 2.079442 3.360375 4.906755 2.760010 4.553877 4.356709 3.526361 2.7725887
## 39 2.484907 3.616309 4.980863 2.862201 3.761200 4.406719 3.401197 2.4849066
## 40 4.382027 4.163560 5.172754 3.030134 4.912655 4.828314 3.871201 1.7917595
## 41 2.079442 3.546740 4.862908 3.030134 4.890349 4.077537 4.234107 2.7725887
## 42 2.197225 3.152736 4.850467 2.660260 3.496508 3.465736 3.737670 2.3025851
## 43 3.465736 4.453184 5.177279 3.310543 5.003946 4.394449 2.564949 3.7135721
## 44 2.772589 4.175925 5.170484 3.044522 5.707110 3.496508 4.043051 3.4011974
## 45 1.945910 3.095578 4.794964 2.714695 4.077537 3.663562 3.828641 2.6390573
## 46 4.174387 4.280824 5.139322 3.210844 4.852030 4.762174 4.094345 3.9889840
## 47 1.791759 3.144152 4.771532 2.809403 4.406719 3.806662 3.828641 3.1780538
## 48 1.791759 3.054001 4.805659 2.653242 4.158883 3.931826 4.605170 2.9444390
## 49 2.302585 3.481240 4.955827 2.778819 4.584967 3.332205 3.871201 3.8066625
## 50 2.890372 4.318821 5.144583 3.238678 3.912023 4.043051 4.532599 2.9444390
## 51 2.708050 4.261270 5.174453 3.122365 4.234107 4.574711 4.219508 3.3322045
## 52 2.197225 3.325036 4.964940 2.602690 3.970292 4.248495 3.951244 2.8903718
## 53 3.583519 4.701389 5.167639 3.575151 4.744932 3.663562 4.499810 3.4657359
## 54 3.465736 4.784153 5.182907 3.629660 4.532599 3.737670 2.564949 2.5649494
## 55 3.828641 4.235555 5.144000 3.157000 3.737670 4.330733 4.532599 1.9459101
## 56 3.871201 4.644391 5.207298 3.440418 4.691348 4.624973 3.663562 3.4339872
## 57 2.833213 4.107590 5.179534 2.960105 4.442651 3.951244 4.343805 2.8903718
## 58 2.397895 4.003690 5.049215 3.113515 4.828314 4.499810 3.295837 2.5649494
## 59 2.995732 4.251348 5.118592 3.222868 3.850148 4.382027 4.025352 2.3978953
## 60 1.791759 3.000720 4.736198 2.740840 4.262680 4.454347 3.178054 2.8903718
## 61 3.912023 4.077537 5.195731 2.895912 4.043051 4.605170 3.784190 2.7080502
## 62 2.397895 3.580737 4.909709 2.970414 4.919981 4.110874 4.025352 2.3978953
## 63 2.639057 4.200205 5.131672 3.148453 5.017280 0.000000 3.970292 3.6635616
## 64 2.564949 4.102643 5.118592 3.077312 3.988984 3.988984 4.304065 3.2958369
## 65 2.639057 4.135167 5.053695 3.238678 4.553877 4.158883 3.891820 3.1354942
## 66 1.945910 3.161247 4.849684 2.674149 4.663439 3.828641 3.988984 3.2958369
## 67 4.382027 4.347694 5.143416 3.269569 5.153292 4.442651 4.727388 3.4657359
## 68 4.077537 4.056989 5.083266 3.100092 4.820282 4.521789 4.718499 2.6390573
## 69 2.890372 4.070735 5.156178 2.970414 4.605170 4.025352 3.988984 2.3025851
## 70 2.772589 4.154185 5.171620 3.020425 4.595120 3.663562 4.634729 3.2580965
## 71 2.397895 3.575151 5.000585 2.785011 3.891820 4.574711 3.433987 1.9459101
## 72 4.110874 4.764735 5.203457 3.569533 5.789960 4.532599 3.663562 2.7080502
## 73 4.189655 4.453184 5.200153 3.261935 4.343805 4.804021 3.526361 3.1354942
## 74 2.484907 3.380995 4.950885 2.687847 3.044522 4.262680 4.219508 1.7917595
## 75 3.737670 4.309456 5.174453 3.169686 6.315358 4.882802 3.433987 3.4011974
## 76 3.761200 4.768988 5.202907 3.572346 4.043051 4.369448 4.343805 2.0794415
## log_vol2 log_vol3 log_flow1 log_flow2 log_flow3 grp mu_vol sig_vol
## 1 3.4011974 5.236442 -4.9618451 -0.91629073 1.21104772 2 0 0
## 2 2.1972246 2.833213 -1.9805016 -1.41469384 -0.30245736 2 0 0
## 3 2.9957323 4.219508 -1.9310215 -1.91732269 0.14236724 1 0 0
## 4 1.7917595 2.833213 -1.1425642 -1.89711998 -1.26230838 1 0 0
## 5 0.6931472 2.639057 -5.8091430 -3.19418321 -0.35667494 5 0 0
## 6 1.9459101 3.135494 -0.7133499 -2.40794561 -0.44785082 1 0 0
## 7 0.6931472 5.683580 -1.0817552 -2.01740615 0.97418221 1 0 0
## 8 0.0000000 4.828314 -0.3812604 -4.71053070 1.04415610 1 0 0
## 9 2.3978953 4.941642 -0.7700282 -0.54645280 0.86415498 1 0 0
## 10 3.4965076 3.091042 -0.5744757 -0.86038310 1.01160091 1 0 0
## 11 2.4849066 3.401197 -2.1202635 -1.59454930 -1.29098418 2 0 0
## 12 2.9444390 4.553877 -3.7297014 -0.61064596 0.11154137 5 0 0
## 13 2.1972246 3.332205 -0.6329933 -2.91877123 0.19638881 1 0 0
## 14 2.7080502 3.784190 -1.0996128 -1.03001950 -0.46362402 1 0 0
## 15 0.6931472 3.850148 -1.9732813 -2.07944154 -0.36961546 2 0 0
## 16 3.3672958 2.944439 -2.1803675 -1.35092722 -1.18417018 2 0 0
## 17 1.7917595 3.871201 -1.0188773 -1.29828348 -0.59420723 1 0 0
## 18 3.2958369 4.564348 -1.6398971 -1.08470938 0.34500714 1 0 0
## 19 2.1972246 1.386294 -1.8140051 -2.56394986 -2.97592965 2 0 0
## 20 0.6931472 5.003946 -4.4228486 -3.01593498 0.38865799 1 0 0
## 21 3.2188758 4.442651 -3.2441936 -1.55116900 0.72899683 5 0 0
## 22 0.0000000 4.574711 -1.4271164 -3.86323284 -0.01005034 2 0 0
## 23 2.4849066 1.945910 -1.8078889 -1.95899539 -1.90380897 1 0 0
## 24 3.0445224 4.007333 -1.5896353 -1.16155209 -0.47000363 1 0 0
## 25 3.1354942 2.302585 -1.4105871 -1.57503649 -1.54646311 1 0 0
## 26 2.7725887 3.135494 -1.3056365 -1.07880966 -1.27296568 1 0 0
## 27 3.2958369 2.484907 -1.5186835 -1.66600826 -1.04128722 1 0 0
## 28 3.7376696 3.663562 -1.8325815 -1.12085790 -0.80296205 1 0 0
## 29 3.0445224 5.036953 -1.5558971 -0.35667494 1.34807315 1 0 0
## 30 2.7725887 1.609438 -0.7402388 -2.18036746 -2.02495336 1 0 0
## 31 3.5553481 2.944439 -1.1270118 -0.91130319 -0.94674994 1 0 0
## 32 2.4849066 4.430817 -1.6873995 -1.07002483 0.87546874 2 0 0
## 33 2.4849066 5.288267 -1.4740333 -0.77219039 0.93137637 2 0 0
## 34 2.8903718 5.159055 -1.6044504 -0.72154666 1.11612471 2 0 0
## 35 3.4965076 3.218876 -1.2241755 -0.99155322 -0.65200524 1 0 0
## 36 3.7612001 3.610918 -1.4396951 -0.71539279 -0.73188801 1 0 0
## 37 4.0775374 4.779123 -0.9162907 -0.84863208 1.04133622 1 0 0
## 38 1.6094379 4.204693 -1.7837913 -2.74887220 0.67854103 1 0 0
## 39 3.2958369 4.584967 -1.2765435 -1.11169753 1.18387213 1 0 0
## 40 3.4011974 2.890372 -3.1235656 -1.42711636 -0.98082925 2 0 0
## 41 2.9957323 3.044522 -2.1202635 -1.08175517 -1.19072758 2 0 0
## 42 3.5263605 5.552960 -1.1940225 0.06109510 1.81531322 1 0 0
## 43 3.1780538 4.955827 -1.2909842 -1.21739582 2.39087066 2 0 0
## 44 2.9957323 5.231109 -2.3025851 -0.50087529 1.18814825 3 0 0
## 45 2.0794415 4.779123 -1.4396951 -1.58474530 0.95049890 1 0 0
## 46 0.0000000 4.189655 -0.8627500 -4.71053070 0.09531018 2 0 0
## 47 2.8332133 4.804021 -1.2275827 -0.97286108 0.97531407 1 0 0
## 48 3.4339872 4.890349 -1.2140231 -0.49758040 0.28517894 1 0 0
## 49 2.5649494 3.806662 -0.7787051 -0.76787073 -0.06400533 1 0 0
## 50 3.6375862 5.817111 -0.9675840 -0.40496523 1.28453845 1 0 0
## 51 3.6888795 3.784190 -0.9014021 -0.88673193 -0.43540898 1 0 0
## 52 3.7376696 3.931826 -1.0788097 -0.51082562 -0.01918282 1 0 0
## 53 3.0910425 4.442651 -1.2801342 -0.57270103 -0.05762911 1 0 0
## 54 3.7135721 3.091042 -1.9661129 -0.02429269 0.52591126 1 0 0
## 55 2.7080502 2.302585 -1.7897615 -1.62455155 -2.22562405 1 0 0
## 56 3.2580965 4.343805 -1.2587810 -1.36649173 0.68006194 1 0 0
## 57 3.2580965 4.564348 -1.5511690 -0.69314718 0.22074067 1 0 0
## 58 2.3978953 2.944439 -2.2633644 -2.10373423 -0.35097692 2 0 0
## 59 3.3322045 4.262680 -1.4524342 -1.04982212 0.23744086 1 0 0
## 60 3.5263605 4.477337 -1.3704210 -0.92886951 1.29937389 1 0 0
## 61 3.6888795 3.218876 -1.3356012 -0.91629073 -0.56563386 1 0 0
## 62 3.6635616 2.833213 -2.5257286 -0.44785082 -1.19072758 2 0 0
## 63 3.2958369 3.737670 -1.3547957 3.29583687 -0.23319389 2 0 0
## 64 3.2580965 4.653960 -0.6931472 -0.73188801 0.34995240 1 0 0
## 65 3.5835189 3.401197 -1.4188176 -0.57447565 -0.49102300 1 0 0
## 66 2.7725887 5.141664 -1.3664917 -1.05555280 1.15278477 1 0 0
## 67 2.9444390 3.044522 -1.6873995 -1.49610923 -1.68200861 2 0 0
## 68 3.6375862 4.025352 -2.1803675 -0.88430769 -0.69314718 2 0 0
## 69 3.5835189 3.496508 -2.3025851 -0.44161055 -0.49265832 1 0 0
## 70 2.8332133 5.690359 -1.3356012 -0.83011304 1.05570479 1 0 0
## 71 3.2580965 3.091042 -1.9449106 -1.31676830 -0.34249031 1 0 0
## 72 3.3672958 2.890372 -3.0791139 -1.16475209 -0.77219039 3 0 0
## 73 3.3322045 3.713572 -1.2073117 -1.46967597 0.18730910 1 0 0
## 74 1.6094379 3.044522 -1.2517635 -2.65926004 -1.17441400 1 0 0
## 75 1.7917595 1.098612 -2.9187712 -3.10109279 -2.33304430 5 0 0
## 76 3.9120230 3.912023 -1.9661129 -0.45728486 -0.43232256 1 0 0
## mu_intrvl sig_intrvl
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
## 11 0 0
## 12 0 0
## 13 0 0
## 14 0 0
## 15 0 0
## 16 0 0
## 17 0 0
## 18 0 0
## 19 0 0
## 20 0 0
## 21 0 0
## 22 0 0
## 23 0 0
## 24 0 0
## 25 0 0
## 26 0 0
## 27 0 0
## 28 0 0
## 29 0 0
## 30 0 0
## 31 0 0
## 32 0 0
## 33 0 0
## 34 0 0
## 35 0 0
## 36 0 0
## 37 0 0
## 38 0 0
## 39 0 0
## 40 0 0
## 41 0 0
## 42 0 0
## 43 0 0
## 44 0 0
## 45 0 0
## 46 0 0
## 47 0 0
## 48 0 0
## 49 0 0
## 50 0 0
## 51 0 0
## 52 0 0
## 53 0 0
## 54 0 0
## 55 0 0
## 56 0 0
## 57 0 0
## 58 0 0
## 59 0 0
## 60 0 0
## 61 0 0
## 62 0 0
## 63 0 0
## 64 0 0
## 65 0 0
## 66 0 0
## 67 0 0
## 68 0 0
## 69 0 0
## 70 0 0
## 71 0 0
## 72 0 0
## 73 0 0
## 74 0 0
## 75 0 0
## 76 0 0
for (i in 1:nrow(lmen3)) {
lmen3$mu_vol[i] <- mean(c(lmen3$log_vol1[i], lmen3$log_vol2[i], lmen3$log_vol3[i]))
lmen3$sig_vol[i] <- sd(c(lmen3$log_vol1[i], lmen3$log_vol2[i], lmen3$log_vol3[i]))
lmen3$mu_intrvl[i] <- mean(c(lmen3$log_int1[i], lmen3$log_int2[i], lmen3$log_int3[i]))
lmen3$sig_intrvl[i] <- sd(c(lmen3$log_int1[i], lmen3$log_int2[i], lmen3$log_int3[i]))
lmen3$mu_flow[i] <- mean(c(lmen3$log_flow1[i], lmen3$log_flow2[i], lmen3$log_flow3[i]))
lmen3$sig_flow[i] <- sd(c(lmen3$log_flow1[i], lmen3$log_flow2[i], lmen3$log_flow3[i]))
}
lvol3_pred <- as.data.frame(predict(men_vol_model, probs=c(0.1,0.9)))
lint3_pred <- as.data.frame(predict(men_intrvl_model, probs=c(0.1,0.9)))
lflow3_pred <- as.data.frame(predict(men_flow_model, probs=c(0.1,0.9)))
lmen3$lvol3_pred <- lvol3_pred$Estimate
lmen3$lint3_pred <- lint3_pred$Estimate
lmen3$lflow3_pred <- lflow3_pred$Estimate
for (i in 1:nrow(lmen3)) {
lmen3$mu_vol_pred[i] <- mean(c(lmen3$log_vol1[i], lmen3$log_vol2[i], lmen3$lvol3_pred[i]))
lmen3$sig_vol_pred[i] <- sd(c(lmen3$log_vol1[i], lmen3$log_vol2[i], lmen3$lvol3_pred[i]))
lmen3$mu_intrvl_pred[i] <- mean(c(lmen3$log_int1[i], lmen3$log_int2[i], lmen3$lint3_pred[i]))
lmen3$sig_intrvl_pred[i] <- sd(c(lmen3$log_int[i], lmen3$log_int2[i], lmen3$lint3_pred[i]))
lmen3$mu_flow_pred[i] <- mean(c(lmen3$log_flow1[i], lmen3$log_flow2[i], lmen3$lflow3_pred[i]))
lmen3$sig_flow_pred[i] <- sd(c(lmen3$log_flow[i], lmen3$log_flow2[i], lmen3$lflow3_pred[i]))
}
vol3_dat_fit <- fitdist(men_all3$vol3, "lnorm")
vol3_pred_fit <- fitdist(exp(lmen3$lvol3_pred), "lnorm")
int3_dat_fit <- fitdist(men_all3$intrvl3, "lnorm")
int3_pred_fit <- fitdist(exp(lmen3$lint3_pred), "lnorm")
flow3_dat_fit <- fitdist(men_all3$flow3, "lnorm")
flow3_pred_fit <- fitdist(exp(lmen3$lflow3_pred), "lnorm")
The graphic posterior predictive check of the predicted variables is in the next chunk. First, the 76 data points and 76 predictions are plotted with the 80% two-sided prediction interval. Second, the data and the predictions are compared with violin plots.
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#ppcheck vol
p1 <- ggplot() +
geom_point(aes(x=seq(1:76), y=vol3), data=men_all3, shape=4) +
geom_line(aes(x=seq(1:76), y= vol3), data=men_all3, alpha=0.25) +
geom_point(aes(x=seq(1:76), y=exp(Estimate)), data=lvol3_pred, color="blue") +
geom_line(aes(x=seq(1:76), y=exp(Estimate)), data=lvol3_pred, color="blue", alpha=0.25) +
geom_ribbon(aes(x=seq(1:76), ymin=exp(Q10), ymax=exp(Q90)), data=lvol3_pred, color="blue", alpha=0.2) +
scale_y_log10("3rd Void Volume")
# ppcheck interval
p2 <- ggplot() +
geom_point(aes(x=seq(1:76), y=intrvl3), data=men_all3, shape=4) +
geom_line(aes(x=seq(1:76), y= intrvl3), data=men_all3, alpha=0.25) +
geom_point(aes(x=seq(1:76), y=exp(Estimate)), data=lint3_pred, color="green") +
geom_line(aes(x=seq(1:76), y=exp(Estimate)), data=lint3_pred, color="green", alpha=0.25) +
geom_ribbon(aes(x=seq(1:76), ymin=exp(Q10), ymax=exp(Q90)), data=lint3_pred, color="green", alpha=0.2) +
scale_y_log10("3rd Void Interval")
# ppcheck flow
p3 <- ggplot() +
geom_point(aes(x=seq(1:76), y=flow3), data=men_all3, shape=4) +
geom_line(aes(x=seq(1:76), y= flow3), data=men_all3, alpha=0.25) +
geom_point(aes(x=seq(1:76), y=exp(Estimate)), data=lflow3_pred, color="yellow") +
geom_line(aes(x=seq(1:76), y=exp(Estimate)), data=lflow3_pred, color="yellow", alpha=0.25) +
geom_ribbon(aes(x=seq(1:76), ymin=exp(Q10), ymax=exp(Q90)), data=lflow3_pred, color="yellow", alpha=0.2) +
scale_y_log10("3rd Urine Flow Rate")
grid.arrange(p1,p2,p3, nrow=3, ncol=1 )
# Third vol comparison
v1 <- ggplot() +
geom_violin(aes(x=1, y=vol3), data=men_all3) +
geom_label(aes(x=0.5, y=100,
label= paste("lognormal\nmu = ",sprintf("%2.4f",vol3_dat_fit$estimate[1]),
" sigma = ", sprintf("%2.4f", vol3_dat_fit$estimate[2]))),
data=men_all3) +
geom_violin(aes(x=2, y=exp(Estimate)), data=lvol3_pred) +
geom_label(aes(x=2.5, y=30,
label=paste("lognormal\nmu = ", sprintf("%2.4f",vol3_pred_fit$estimate[1]),
" sigma = ",sprintf("%2.4f",vol3_pred_fit$estimate[2]))),
data=lvol3_pred) +
scale_x_continuous("", limits=c(0,3), breaks=c(1,2),
labels=c("NHANES 3rd\nVoid Volume", "Predicted 3rd\nVoid Volume")) +
scale_y_log10("Third Void Volume (ml)")
# thrid interval cmparison
v2 <- ggplot() +
geom_violin(aes(x=1, y=intrvl3), data=men_all3) +
geom_label(aes(x=0.5, y=30,
label= paste("lognormal\nmu = ",sprintf("%2.4f",int3_dat_fit$estimate[1]),
" sigma = ", sprintf("%2.4f", int3_dat_fit$estimate[2]))),
data=men_all3) +
geom_violin(aes(x=2, y=exp(Estimate)), data=lint3_pred) +
geom_label(aes(x=2.5, y=10,
label= paste("lognormal\nmu = ",sprintf("%2.4f",int3_pred_fit$estimate[1]),
" sigma = ", sprintf("%2.4f", int3_pred_fit$estimate[2]))),
data=men_all3) +
scale_y_log10("Third Void Interval (min)") +
scale_x_continuous("", limits=c(0,3), breaks=c(1,2), labels=c("NHANES 3rd\nVoid Interval", "Predicted 3rd\nVoid Interval"))
# third flow comparison
v3 <- ggplot() +
geom_violin(aes(x=1, y=flow3), data=men_all3) +
geom_label(aes(x=0.5, y=1,
label= paste("lognormal\nmu = ",sprintf("%2.4f",flow3_dat_fit$estimate[1]),
" sigma = ", sprintf("%2.4f", flow3_dat_fit$estimate[2]))),
data=men_all3) +
geom_violin(aes(x=2, y=exp(Estimate)), data=lflow3_pred) +
geom_label(aes(x=2.5, y=0.1,
label= paste("lognormal\nmu = ",sprintf("%2.4f",flow3_pred_fit$estimate[1]),
" sigma = ", sprintf("%2.4f", flow3_pred_fit$estimate[2]))),
data=men_all3) +
scale_y_log10("Third Urine Flow (ml/min)") +
scale_x_continuous("", limits=c(0,3), breaks=c(1,2), labels=c("NHANES 3rd\nFlow Rate", "Predicted 3rd\nFlow Rate"))
print(v1)
print(v2)
print(v3)
What we really want is the within person summary statistics
# log mean vol comparison
library(ggthemes)
mu_vol_fit <- fitdist(lmen3$mu_vol, "norm")
mu_vol_pred_fit <- fitdist(lmen3$mu_vol_pred, "norm")
vol_p1 <- ggplot(lmen3) +
geom_violin(aes(x=1, y=mu_vol)) +
geom_label(aes(x=0.5, y=1.8,
label= paste("normal\nmu = ",sprintf("%2.4f",mu_vol_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=mu_vol_pred)) +
geom_label(aes(x=2.5, y=1.6,
label= paste("normal\nmu = ",sprintf("%2.4f",mu_vol_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,3), breaks=c(1,2), labels=c("data", "pred_volume")) +
scale_y_continuous("log mean volume per individual") +
theme_economist_white()
# sigma vol comparison
sig_vol_fit <- fitdist(lmen3$sig_vol, "gamma")
sig_vol_pred_fit <- fitdist(lmen3$sig_vol_pred, "gamma")
vol_p2 <- ggplot(lmen3) +
geom_violin(aes(x=1, y=sig_vol)) +
geom_label(aes(x=0.5, y=2,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=sig_vol_pred)) +
geom_label(aes(x=2.5, y=1.5,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_pred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,3), breaks=c(1,2), labels=c("data", "pred_volume")) +
scale_y_continuous("sigma volume per individual") +
theme_economist_white()
grid.arrange(vol_p1, vol_p2, ncol=2)
# clog mean interval comparison
mu_int_fit <- fitdist(lmen3$mu_intrvl, "norm")
mu_int_pred_fit <- fitdist(lmen3$mu_intrvl_pred, "norm")
int_p1 <- ggplot(lmen3) +
geom_violin(aes(x=1, y=mu_intrvl)) +
geom_label(aes(x=0.5, y=3.5,
label= paste("normal\nmu = ",sprintf("%2.4f",mu_int_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=mu_intrvl_pred)) +
geom_label(aes(x=2.5, y=3.6,
label= paste("normal\nmu = ",sprintf("%2.4f",mu_int_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_int_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,3), breaks=c(1,2), labels=c("data", "pred_volume")) +
scale_y_continuous("log mean void interval per individual") +
theme_economist_white()
# sigma interval comparison
sig_int_fit <- fitdist(lmen3$sig_intrvl, "gamma")
sig_int_pred_fit <- fitdist(lmen3$sig_intrvl_pred, "gamma")
int_p2 <- ggplot(lmen3) +
geom_violin(aes(x=1, y=sig_intrvl)) +
geom_label(aes(x=0.5, y=2.5,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=sig_intrvl_pred)) +
geom_label(aes(x=2.5, y=2.6,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_pred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,3), breaks=c(1,2), labels=c("data", "pred_volume")) +
scale_y_continuous("sigma void interval per individual") +
theme_economist_white()
grid.arrange(int_p1, int_p2, ncol=2)
# log mean flow comparison
mu_flow_fit <- fitdist(lmen3$mu_flow, "norm")
mu_flow_pred_fit <- fitdist(lmen3$mu_flow_pred, "norm")
flow_p1 <- ggplot(lmen3) +
geom_violin(aes(x=1, y=mu_flow)) +
geom_label(aes(x=0.7, y=0.6,
label= paste("lognormal\nmu = ",sprintf("%2.4f",mu_flow_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=mu_flow_pred)) +
geom_label(aes(x=2.5, y=0.5,
label= paste("lognormal\nmu = ",sprintf("%2.4f",mu_flow_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_int_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,3), breaks=c(1,2), labels=c("data", "pred_volume")) +
scale_y_continuous("log mean flow per individual") +
theme_economist_white()
# sigma flow comparison
sig_flow_fit <- fitdist(lmen3$sig_flow, "gamma")
sig_flow_pred_fit <- fitdist(lmen3$sig_flow_pred, "gamma")
flow_p2 <- ggplot(lmen3) +
geom_violin(aes(x=1, y=sig_flow)) +
geom_label(aes(x=0.5, y=.5,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_flow_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_flow_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2.2, y=sig_vol_pred)) + geom_label(aes(x=2.5, y=1.5,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_flow_pred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_flow_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,3), breaks=c(1,2), labels=c("data", "pred_volume")) +
scale_y_continuous("sigma flow per individual") +
theme_economist_white()
grid.arrange(flow_p1, flow_p2, ncol=2)
The values in this larger data set are predicted to obtain values for the log mean and log sd of the volume and interval
##########################################
# predictions from brm
########################################
men_only2 <- read_csv("men_only2a.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## ID = col_double(),
## age = col_double(),
## wt = col_double(),
## ht = col_double(),
## bmi = col_double(),
## int1 = col_double(),
## int2 = col_double(),
## vol1 = col_double(),
## vol2 = col_double(),
## flow1 = col_double(),
## flow2 = col_double()
## )
men_only2$grp <- rep(0,1856)
for (i in 1:length(men_only2$int1)) {
if (men_only2$int1[i] <= 120) { men_only2$grp[i] <- 1}
if (men_only2$int1[i] >120 && men_only2$int1[i] <=240) { men_only2$grp[i] <- 2 }
if (men_only2$int1[i] >240 && men_only2$int1[i] <=360) { men_only2$grp[i] <- 3 }
if (men_only2$int1[i] >360 && men_only2$int1[i] <=480) { men_only2$grp[i] <- 4 }
if (men_only2$int1[i] >480) { men_only2$grp[i] <- 5 }
}
lmen2 <- data.frame(
log_age = log(men_only2$age),
log_wt = log(men_only2$wt),
log_ht = log(men_only2$ht),
log_bmi = log(men_only2$bmi),
log_int1 = log(men_only2$int1),
log_int2 = log(men_only2$int2),
log_vol1 = log(men_only2$vol1),
log_vol2 = log(men_only2$vol2),
log_flow1 = log(men_only2$flow1),
log_flow2 = log(men_only2$flow2),
grp = men_only2$grp)
larger_vol_pred <- as_tibble(predict(men_vol_model, newdata = lmen2, allow_new_levels=TRUE))
larger_int_pred <- as_tibble(predict(men_intrvl_model, newdata = lmen2, allow_new_levels=TRUE))
larger_flow_pred <- as_tibble(predict(men_flow_model, newdata = lmen2, allow_new_levels=TRUE))
lmen_added3 <- mutate(lmen2,
log_vol3 = larger_vol_pred$Estimate,
log_int3 = larger_int_pred$Estimate,
log_flow3 = larger_flow_pred$Estimate )
for (i in 1:length(lmen_added3$log_vol1)) {
lmen_added3$mu_vol[i] <- mean(c(lmen_added3$log_vol1[i], lmen_added3$log_vol2[i], lmen_added3$log_vol3[i]))
lmen_added3$sig_vol[i] <- sd(c(lmen_added3$log_vol1[i], lmen_added3$log_vol2[i], lmen_added3$log_vol3[i]))
lmen_added3$mu_intrvl[i] <- mean(c(lmen_added3$log_int1[i], lmen_added3$log_int2[i], lmen_added3$log_int3[i]))
lmen_added3$sig_intrvl[i] <- sd(c(lmen_added3$log_int1[i], lmen_added3$log_int2[i], lmen_added3$log_int3[i]))
lmen_added3$mu_flow[i] <- mean(c(lmen_added3$log_flow1[i], lmen_added3$log_flow2[i], lmen_added3$log_flow3[i]))
lmen_added3$sig_flow[i] <- sd(c(lmen_added3$log_flow1[i], lmen_added3$log_flow2[i], lmen_added3$log_flow3[i]))
}
Here, the predictions for between-person variation in log means and log standard deviations for void volume, void interval and urine production rate are fit to normal distributions for the log means and gamma distributions for the log standard deviations.
## mu volume
mu_vol_bigpred_fit <- fitdist(lmen_added3$mu_vol, "norm")
vol2_p1 <- ggplot() +
geom_violin(aes(x=1, y=mu_vol), data=lmen3) +
geom_label(aes(x=0.5, y=2, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_vol_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=mu_vol_pred), data=lmen3) +
geom_label(aes(x=2, y=2,
label= paste("lognormal\nmu = ",sprintf("%2.4f",mu_vol_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_pred_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=3, y=mu_vol), data=lmen_added3) +
geom_label(aes(x=3.5, y=2,
label= paste("lognormal\nmu = ",sprintf("%2.4f",mu_vol_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_vol_pred_fit$estimate[2]))),
data=lmen3, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("mu_vol_data", "pred_mu", "larger_pred_mu")) +
scale_y_continuous("mu volume per individual")
## sigma volume
sig_vol_bigpred_fit <- fitdist(lmen_added3$sig_vol, "gamma")
vol2_p2 <- ggplot() +
geom_violin(aes(x=1, y=sig_vol), data = lmen3) +
geom_label(aes(x=0.5, y=2,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=sig_vol_pred), data=lmen3) +
geom_label(aes(x=2, y=2,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_pred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_pred_fit$estimate[2]))),
data=lmen_added3, size=3) +
geom_violin(aes(x=3, y=sig_vol), data=lmen_added3) +
geom_label(aes(x=3.5, y=2,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_vol_bigpred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_vol_bigpred_fit$estimate[2]))),
data=lmen_added3, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("sig_vol_data", "pred_sig", "larger_pred_sig")) +
scale_y_continuous("sigma volume per individual")
grid.arrange(vol2_p1, vol2_p2, ncol=2)
## mu interval
mu_int_bigpred_fit <- fitdist(lmen_added3$mu_vol, "norm")
int2_p1 <- ggplot() +
geom_violin(aes(x=1, y=mu_intrvl), data=lmen3) +
geom_label(aes(x=0.5, y=3, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_int_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_int_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=mu_intrvl_pred), data=lmen3) +
geom_label(aes(x=2, y=3, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_int_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_int_pred_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=3, y=mu_intrvl), data=lmen_added3) +
geom_label(aes(x=3.5, y=3, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_int_bigpred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_int_pred_fit$estimate[2]))),
data=lmen_added3, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("mu_int_data", "pred_mu", "larger_pred_mu")) +
scale_y_continuous("mu intrvl per individual")
## sigma interval
sig_int_bigpred_fit <- fitdist(lmen_added3$sig_intrvl, "gamma")
int2_p2 <- ggplot() +
geom_violin(aes(x=1, y=sig_intrvl), data = lmen3) +
geom_label(aes(x=0.5, y=2.3,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_int_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_int_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=sig_intrvl_pred), data=lmen3) +
geom_label(aes(x=2, y=1.8,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_int_pred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_int_pred_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=3, y=sig_intrvl), data=lmen_added3) +
geom_label(aes(x=3.5, y=1.3,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_int_bigpred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_int_bigpred_fit$estimate[2]))),
data=lmen_added3, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("sig_int_data", "pred_sig", "larger_pred_sig")) +
scale_y_continuous("sigma interval per individual")
grid.arrange(int2_p1, int2_p2, ncol=2)
## mu flow
mu_flow_bigpred_fit <- fitdist(lmen_added3$mu_flow, "norm")
flow2_p1 <- ggplot() +
geom_violin(aes(x=1, y=mu_flow), data=lmen3) +
geom_label(aes(x=0.5, y=-2.5, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_flow_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_flow_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=mu_flow_pred), data=lmen3) +
geom_label(aes(x=2, y=-2.5, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_flow_pred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_flow_pred_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=3, y=mu_flow), data=lmen_added3) +
geom_label(aes(x=3.5, y=-2.5, label = paste("lognormal\nmu = ",sprintf("%2.4f",mu_flow_bigpred_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",mu_flow_bigpred_fit$estimate[2]))),
data=lmen_added3, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("mu_flow_data", "pred_mu", "larger_pred_mu")) +
scale_y_continuous("mu flow per individual")
## sigma flow
sig_flow_bigpred_fit <- fitdist(lmen_added3$sig_flow, "gamma")
flow2_p2 <- ggplot() +
geom_violin(aes(x=1, y=sig_flow), data = lmen3) +
geom_label(aes(x=0.5, y=1,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_flow_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_flow_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=2, y=sig_flow_pred), data=lmen3) +
geom_label(aes(x=2, y=1,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_flow_pred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_flow_pred_fit$estimate[2]))),
data=lmen3, size=3) +
geom_violin(aes(x=3, y=sig_flow), data=lmen_added3) +
geom_label(aes(x=3.5, y=1,
label= paste("gamma\nshape = ",sprintf("%2.4f",sig_flow_bigpred_fit$estimate[1]),
"\nrate = ",sprintf("%2.4f",sig_flow_bigpred_fit$estimate[2]))),
data=lmen_added3, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("sig_flow_data", "pred_sig", "larger_pred_sig")) +
scale_y_continuous("sigma flow per individual")
grid.arrange(flow2_p1, flow2_p2, ncol=2)
Here, the smaller sample of 76 men with 3 voids from which the prediction model was developed and the larger sample of men with two voids are combined to create a dataset of 1932 men. This dataset contains males from 6 to 80 years as well as a range o body mass and bmi. From this dataset, selections can be made to obtain a hypothetical population similar to a study population with known biometric characteristics.
men_all <- data.frame(age = c(men_all3$age, men_only2$age),
wt = c(men_all3$wt, men_only2$wt),
ht = c(men_all3$ht, men_only2$ht),
bmi = c(men_all3$bmi, men_only2$bmi),
mu_vol = c(lmen3$mu_vol, lmen_added3$mu_vol),
sig_vol = c(lmen3$sig_vol, lmen_added3$sig_vol),
mu_int = c(lmen3$mu_intrvl, lmen_added3$mu_intrvl),
sig_int = c(lmen3$sig_intrvl, lmen_added3$sig_intrvl),
mu_flow = c(lmen3$mu_flow, lmen_added3$mu_flow),
sig_flow = c(lmen3$sig_flow, lmen_added3$sig_flow)
)
write_csv(men_all, "men_all_020721.csv")
W_mu_vol <- fitdist(men_all$mu_vol, "norm")
summary(W_mu_vol)
## Fitting of the distribution ' norm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## mean 3.8093540 0.009004540
## sd 0.3957902 0.006366988
## Loglikelihood: -950.6746 AIC: 1905.349 BIC: 1916.482
## Correlation matrix:
## mean sd
## mean 1 0
## sd 0 1
ggplot() +
stat_ecdf(aes(mu_vol), data=men_all, geom="point", size=3, shape=4, alpha=0.2) +
stat_ecdf(aes(rnorm(nrow(men_all), W_mu_vol$estimate[1], W_mu_vol$estimate[2])), geom="step", colour="blue", size=0.9) +
labs(title = "within person log mean void volume", x="log mean void volume", y="cumulative frequency")
W_sig_vol <- fitdist(men_all$sig_vol, "gamma")
summary(W_sig_vol)
## Fitting of the distribution ' gamma ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 3.419945 0.1051106
## rate 4.493335 0.1487532
## Loglikelihood: -823.6231 AIC: 1651.246 BIC: 1662.379
## Correlation matrix:
## shape rate
## shape 1.0000000 0.9283883
## rate 0.9283883 1.0000000
ggplot() +
stat_ecdf(aes(sig_vol), data=men_all, geom="point", size=3, shape=4, alpha=0.2) +
stat_ecdf(aes(rgamma(nrow(men_all), W_sig_vol$estimate[1], W_sig_vol$estimate[2])), geom="step", colour="blue", size=0.9) +
labs(title = "within-person sigma void volume", x="sigma void volume", y="cumulative frequency")
W_mu_int <- fitdist(men_all$mu_int, "norm")
summary(W_mu_int)
## Fitting of the distribution ' norm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## mean 4.3042542 0.005861938
## sd 0.2576587 0.004144735
## Loglikelihood: -121.3664 AIC: 246.7328 BIC: 257.8655
## Correlation matrix:
## mean sd
## mean 1.000000e+00 -1.294761e-13
## sd -1.294761e-13 1.000000e+00
ggplot() +
stat_ecdf(aes(mu_int), data=men_all, geom="point", size=3, shape=4, alpha=0.2) +
stat_ecdf(aes(rnorm(nrow(men_all), W_mu_int$estimate[1], W_mu_int$estimate[2])), geom="step", colour="blue", size=0.9) +
labs(title = "within-person log mean void interval", x="log mean void void interval", y="cumulative frequency")
W_sig_int <- fitdist(men_all$sig_int, "gamma")
summary(W_sig_int)
## Fitting of the distribution ' gamma ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 5.607287 0.1753075
## rate 9.134002 0.2987475
## Loglikelihood: -13.16441 AIC: 30.32883 BIC: 41.46145
## Correlation matrix:
## shape rate
## shape 1.0000000 0.9558825
## rate 0.9558825 1.0000000
ggplot() +
stat_ecdf(aes(sig_int), data=men_all, geom="point", size=3, shape=4, alpha=0.2) +
stat_ecdf(aes(rgamma(nrow(men_all), shape=W_sig_int$estimate[1], rate=W_sig_int$estimate[2])), geom="step", colour="blue", size=0.9) +
labs(title = "within-person sigma void interval", x="sigma void interval", y="cumulative frequency")
W_mu_flow <- fitdist(men_all$mu_flow, "norm")
summary(W_mu_flow)
## Fitting of the distribution ' norm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## mean -0.4863633 0.011076080
## sd 0.4868438 0.007831823
## Loglikelihood: -1350.713 AIC: 2705.425 BIC: 2716.558
## Correlation matrix:
## mean sd
## mean 1 0
## sd 0 1
ggplot() +
stat_ecdf(aes(mu_flow), data=men_all, geom="point", size=3, shape=4, alpha=0.2) +
stat_ecdf(aes(rnorm(nrow(men_all), W_mu_flow$estimate[1], W_mu_flow$estimate[2])), geom="step", colour="blue", size=0.9) +
labs(title = "within-person log mean urine production", x="log mean void urine production", y="cumulative frequency")
W_sig_flow <- fitdist(men_all$sig_flow, "gamma")
summary(W_sig_flow)
## Fitting of the distribution ' gamma ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 2.330229 0.07027057
## rate 3.086750 0.10383195
## Loglikelihood: -1073.909 AIC: 2151.818 BIC: 2162.951
## Correlation matrix:
## shape rate
## shape 1.0000000 0.8964895
## rate 0.8964895 1.0000000
ggplot() +
stat_ecdf(aes(sig_flow), data=men_all, geom="point", size=3, shape=4, alpha=0.2) +
stat_ecdf(aes(rgamma(nrow(men_all), W_sig_flow$estimate[1], W_sig_flow$estimate[2])), geom="step", colour="blue", size=0.9) +
labs(title = "within-person sigma urine productionl", x="sigma urine production", y="cumulative frequency")
#men_all_clean <- men_all[-which(is.na(men_all))]
men_cor <- cor(men_all, method="spearman")
print(men_cor, digits=2)
## age wt ht bmi mu_vol sig_vol mu_int sig_int mu_flow
## age 1.000 0.5802 0.458 0.5429 -0.199 0.0311 0.162 0.082 -0.2310
## wt 0.580 1.0000 0.721 0.9355 0.026 -0.0287 0.053 0.135 0.0067
## ht 0.458 0.7206 1.000 0.4747 0.043 -0.0474 0.100 -0.023 -0.0279
## bmi 0.543 0.9355 0.475 1.0000 0.016 -0.0071 0.026 0.190 0.0224
## mu_vol -0.199 0.0261 0.043 0.0164 1.000 0.3623 -0.058 0.090 0.8254
## sig_vol 0.031 -0.0287 -0.047 -0.0071 0.362 1.0000 -0.047 0.176 0.3112
## mu_int 0.162 0.0531 0.100 0.0263 -0.058 -0.0470 1.000 0.444 -0.5657
## sig_int 0.082 0.1345 -0.023 0.1903 0.090 0.1756 0.444 1.000 -0.1643
## mu_flow -0.231 0.0067 -0.028 0.0224 0.825 0.3112 -0.566 -0.164 1.0000
## sig_flow -0.431 -0.3029 -0.329 -0.2352 0.078 0.4176 0.255 0.145 -0.0879
## sig_flow
## age -0.431
## wt -0.303
## ht -0.329
## bmi -0.235
## mu_vol 0.078
## sig_vol 0.418
## mu_int 0.255
## sig_int 0.145
## mu_flow -0.088
## sig_flow 1.000
lmen3$sig_vol_ratio <- rep(0,76)
lmen3$sig_int_ratio <- rep(0,76)
for (i in 1:76) { lmen3$sig_vol_ratio[i] <- lmen3$sig_vol[i]/lmen3$sig_vol_pred[i]
lmen3$sig_int_ratio[i] <- lmen3$sig_intrvl[i]/lmen3$sig_intrvl_pred[i] }
The two studies selected are Smolders et al. (2014) and Parsons et al. (2007). The stated purposes of these studies were to further the understanding of the effects of inter- and intra-individual variation human biomonitoring studies and to investigate bladder diary measurements from asymptomatic individuals during both day and night as well as the relationship with age.
library(readxl)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Parsons_men <- as_tibble(read_xlsx("Urine paper Dec 2020/Parsons-extract.xlsx", sheet="Men"))
## New names:
## * `` -> ...1
## * `` -> ...15
## * `` -> ...16
## * `` -> ...17
## * `` -> ...18
Smolders <- read_csv("Urine paper Dec 2020/Smolders.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Subject = col_double(),
## DOW = col_character(),
## Code = col_character(),
## Age = col_double(),
## Gender = col_character(),
## HT = col_double(),
## wt = col_double(),
## Time = col_time(format = ""),
## interval_h = col_double(),
## total_time = col_double(),
## vol = col_double(),
## Cr = col_double(),
## SG = col_double()
## )
S2 <- dplyr::filter(Smolders, Subject==2, as.numeric(as.POSIXct(Time))>=25200 && as.numeric(as.POSIXct(Time))<=83000)
S2m <- mean(S2$vol)
S4 <- dplyr::filter(Smolders, Subject==4, as.numeric(as.POSIXct(Time))>=25200 && as.numeric(as.POSIXct(Time))<=83000)
S4m <- mean(S4$vol)
S6 <- dplyr::filter(Smolders, Subject==6, as.numeric(as.POSIXct(Time))>=25200 && as.numeric(as.POSIXct(Time))<=83000)
S6m <- mean(S6$vol)
S8 <- dplyr::filter(Smolders, Subject==8, as.numeric(as.POSIXct(Time))>=25200 && as.numeric(as.POSIXct(Time))<=83000)
S8m <- mean(S8$vol)
df1 <- data.frame(x=rep(3,4), vol=c(S2m, S4m, S6m, S8m))
Pv_fit <- fitdist(Parsons_men$MeanVol_day, "lnorm")
Bv_fit <- fitdist(exp(men_all$mu_vol + 0.5*men_all$sig_vol^2), "lnorm")
Sv_fit <- fitdist(df1$vol, "lnorm")
ggplot() +
geom_violin(aes(x=1, y=MeanVol_day), data=Parsons_men) +
geom_violin(aes(x=2, y=exp(mu_vol + 0.5*sig_vol^2)), data= men_all) +
geom_point(aes(x=x, y=vol), data=df1, shape=4, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("Parsons et al.", "NHANES", "Smolders et al.")) +
scale_y_log10("Mean day_time Void Volume") +
geom_label(aes(x=1, y=10, label=paste("lognormal\nmu = ",sprintf("%2.4f",Pv_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Pv_fit$estimate[2]))), data=Parsons_men) +
geom_label(aes(x=2, y=10, label=paste("lognormal\nmu = ",sprintf("%2.4f",Bv_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Bv_fit$estimate[2]))), data= men_all) +
geom_label(aes(x=3, y=10, label=paste("lognormal\nmu = ",sprintf("%2.4f",Sv_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Sv_fit$estimate[2]))), data=df1)
S2i <- mean(S2$interval_h*60)
S4i <- mean(S4$interval_h*60)
S6i <- mean(S6$interval_h*60)
S8i <- mean(S8$interval_h*60)
df2 <- data.frame(x=rep(3,4), int=c(S2i, S4i, S6i, S8i))
Pi_fit <- fitdist(16*60/Parsons_men$F_day, "lnorm")
Bi_fit <- fitdist(exp(men_all$mu_int + 0.5*men_all$sig_int^2), "lnorm")
Si_fit <- fitdist(df2$int, "lnorm")
ggplot() +
geom_violin(aes(x=1, y=12*60/F_day), data=Parsons_men) +
geom_violin(aes(x=2, y=exp(mu_int + 0.5*sig_flow^2)), data= men_all) +
geom_point(aes(x=x, y=int), data=df2, shape=4, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("Parsons et al.", "NHANES", "Smolders et al.")) +
scale_y_log10("Mean daytime Void Interval") +
geom_label(aes(x=1, y=3000, label=paste("lognormal\nmu = ",sprintf("%2.4f",Pi_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Pi_fit$estimate[2]))), data=Parsons_men) +
geom_label(aes(x=2, y=3000, label=paste("lognormal\nmu = ",sprintf("%2.4f",Bi_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Bi_fit$estimate[2]))), data= men_all) +
geom_label(aes(x=3, y=3000, label=paste("lognormal\nmu = ",sprintf("%2.4f",Si_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Si_fit$estimate[2]))), data=df2)
S2f <- mean(S2$vol/(S2$interval_h*60))
S4f <- mean(S4$vol/(S4$interval_h*60))
S6f <- mean(S6$vol/(S6$interval_h*60))
S8f <- mean(S8$vol/(S8$interval_h*60))
df3 <- data.frame(x=rep(3,4), flow=c(S2f, S4f, S6f, S8f))
Pf_fit <- fitdist(Parsons_men$Flow_day, "lnorm")
Bf_fit <- fitdist(exp(men_all$mu_flow + 0.5*men_all$sig_flow^2), "lnorm")
Sf_fit <- fitdist(df3$flow, "lnorm")
ggplot() +
geom_violin(aes(x=1, y=Flow_day), data=Parsons_men) +
geom_violin(aes(x=2, y=exp(mu_flow + 0.5*sig_flow^2)), data= men_all) +
geom_point(aes(x=x, y=flow), data=df3, shape=4, size=3) +
scale_x_continuous(limits=c(0,4), breaks=c(1,2,3), labels=c("Parsons et al.", "NHANES", "Smolders et al.")) +
scale_y_log10("Mean daytime Urine production") +
geom_label(aes(x=1, y=30, label=paste("lognormal\nmu = ",sprintf("%2.4f",Pf_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Pf_fit$estimate[2]))), data=Parsons_men) +
geom_label(aes(x=2, y=30, label=paste("lognormal\nmu = ",sprintf("%2.4f",Bf_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Bf_fit$estimate[2]))), data= men_all) +
geom_label(aes(x=3, y=30, label=paste("lognormal\nmu = ",sprintf("%2.4f",Sf_fit$estimate[1]),
"\nsigma = ",sprintf("%2.4f",Sf_fit$estimate[2]))), data=df3)